Load data

cafe = read_csv(here::here("data/Sidewalk_Caf__Licenses_and_Applications_clean.csv"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   zip = col_double(),
##   swc_sq_ft = col_double(),
##   swc_tables = col_double(),
##   swc_chairs = col_double(),
##   lat = col_double(),
##   long = col_double(),
##   community_district = col_double(),
##   city_council_district = col_double(),
##   app_sq_ft = col_double(),
##   app_tables = col_double(),
##   app_chairs = col_double(),
##   app_status_date = col_datetime(format = ""),
##   expiration_date = col_datetime(format = ""),
##   app_too_date = col_datetime(format = ""),
##   submit_date = col_datetime(format = ""),
##   intake_dd = col_datetime(format = ""),
##   send_package_dd = col_datetime(format = ""),
##   cp_dd = col_datetime(format = ""),
##   cb_dd = col_datetime(format = ""),
##   hearing_dd = col_datetime(format = "")
##   # ... with 4 more columns
## )
## See spec(...) for full column specifications.
parking = read_csv(
  here::here("data/parking_vio2021_cleanv1.csv"))
## Parsed with column specification:
## cols(
##   id = col_double(),
##   summons_number = col_double(),
##   registration_state = col_character(),
##   issue_date = col_datetime(format = ""),
##   violation_code = col_double(),
##   vehicle_make = col_character(),
##   hour = col_double(),
##   min = col_character(),
##   violation_county = col_character(),
##   house_number = col_character(),
##   street_name = col_character(),
##   intersecting_street = col_character(),
##   vehicle_color = col_character(),
##   vehicle_year = col_double(),
##   address = col_character(),
##   long = col_double(),
##   lat = col_double(),
##   borough = col_character()
## )

Cafe Map

plot_cafe_map =
  parking %>%
  count(street_name, name = "ticket") %>%
  right_join(cafe) %>%
  mutate(ticket = replace_na(ticket,1e-10))
## Joining, by = "street_name"
pal = colorNumeric(palette = c("viridis", "magma", "inferno","plasma")[[4]],
                   domain = plot_cafe_map$ticket %>% log())

plot_cafe_map =
  plot_cafe_map%>%
  mutate(pop =
           str_c("<b>",business_name,"</b><br>",round(ticket)," tickets")) %>% 
  leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  addCircleMarkers(
    ~ long,
    ~ lat,
    color = ~pal(ticket %>% log()),
    radius = .1,
    popup = ~ (pop)
  )

plot_cafe_map
vio = parking %>%
  ggplot(aes(x = long, y = lat,color = borough),alpha = 0.0001)+
  geom_jitter()

vio
## Warning: Removed 247 rows containing missing values (geom_point).

plot violation vs time

convert date and format

parking_time = 
  parking %>% 
  select(issue_date,summons_number, vehicle_make, hour, min) %>% 
  mutate( month = lubridate::month(issue_date),
          day = lubridate::day(issue_date))

Make line plots: violation vs month

parking_time %>% 
  group_by(month) %>% 
  summarize(n = n()) %>%
  plot_ly(x = ~month, y =~n, type = 'scatter',mode = 'line')%>%
  layout(
    title = 'Violations per Month',
    xaxis = list(
      type = 'category',
      title = 'Month'),
    yaxis = list(
      title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring 1 observations
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

plots: violation vs weekday

library(readr)
parking_day = parking %>%
  select(issue_date, summons_number)

day_order = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")

parking_day %>% 
  mutate(day_week = weekdays(issue_date))%>%
  group_by(day_week) %>% 
  summarize(n = n()) %>%
  mutate(day_week = factor(day_week, levels = day_order)) %>% 
  arrange(day_week) %>% 
  plot_ly(x = ~ day_week, y = ~ n, type = 'scatter', mode = "line") %>%
  layout(
    title = 'Violations by weekday',
    xaxis = list(
      type = 'category',
      title = 'Weekday'),
    yaxis = list(
      title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring 1 observations

Line plots: vilation vs hour

parking_time %>% 
  group_by(hour) %>% 
  summarize(n = n()) %>%
  filter(hour != 12.3) %>% 
  plot_ly(x = ~hour, y = ~n, type = 'scatter',mode = 'line') %>%
  layout(
    title = 'Violations per Hour',
    xaxis = list(
      type = 'category',
      title = 'Hour',
      range = c(0, 23)),
    yaxis = list(
      title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)